################
# TABLE 1
################
library(rstan)
library(stargazer)
library(data.table)
library(Amelia)
#load("~/Dropbox/Publications/Measuring Racial Bias/ISQ_Replication/Replication Files/Data/bias_replication.rdata")
load("~/Dropbox/Publications/Measuring Racial Bias/ISQ_Replication/Replication Files/Data/migration_model_data.RData")

vars <- c("log_flow", "log_stock", "distw", "contig", "colony", "comlang_off",
  "pop_d", "log_gdp_dif", "urate_d", "comcur", "lib_dem_o", "lib_dem_d",
  "year_ed_15_plus_o", "iso3_o", "iso3_d", "year", "region_o", 
  "region_d", "dyad", "gdpcap_o", "gdpcap_d", "cw_o")
d <- bilateral_migration_data[, vars, with = FALSE]
d <- na.omit(d)
ref_table <- d[, .(dyad, year, log_flow)]
d$log_dist_w <- log(d$distw)
d$log_pop_d <- log(d$pop_d)
d$log_urate_d <- log(d$urate_d)
d$log_gdpcap_o <- log(d$gdpcap_o)
d$log_gdpcap_d <- log(d$gdpcap_d)
d$log_gdpcap_o2 <- d$log_gdpcap_o^2
d$log_gdpcap_d2 <- d$log_gdpcap_d^2
d$cw_o <- ifelse(is.na(d$cw_o), 0, d$cw_o)
y <- d$log_flow
year <- as.numeric(factor(d$year))
iso3_d <- as.numeric(factor(d$iso3_d))
iso3_o <- as.numeric(factor(d$iso3_o))
d[, c("log_flow", "iso3_o", "iso3_d", "year", "region_o", "region_d",
      "pop_d", "urate_d", "distw", "dyad", "gdpcap_o", "gdpcap_d") := NULL]
X <- as.matrix(d)
for(k in 1:ncol(d)){
  X[, k] <- arm::rescale(X[, k], binary.inputs = "0/1")
}
X <- cbind(1, X)
X <- apply(X, 2, as.numeric)

dl <- list(
  K = ncol(X),
  N = nrow(X),
  y_obs = y,
  X = X,
  N_d = max(iso3_d),
  N_o = max(iso3_o),
  i_d = iso3_d,
  i_o = iso3_o,
  N_year = max(year),
  i_year = year
)

sm <- stan_model("~/Dropbox/Publications/Measuring Racial Bias/ISQ_Replication/Replication Files/Replication/weibull_hurdle_6.stan")
sf <- vb(sm, data = dl, output_samples = 1000, seed = 570175513)
ex <- rstan::extract(sf)

####################
# Figure 1
####################
plot(density(dl$y_obs), lwd = 2, xlab = "Log Flows",
     main = "Predictive Check of Model for Migration Flows", lty = "dotted")
for(s in 1:1000){
  lines(density(ex$y_rep[s, ]), col = scales::alpha("red", 0.15), lwd = 2)
}


####################
# Imputation
####################
vars <- c("log_flow", "log_stock", "distw", "contig", "colony", "comlang_off",
          "pop_d", "log_gdp_dif", "urate_d", "comcur", "lib_dem_o", "lib_dem_d",
          "year_ed_15_plus_o", "iso3_o", "iso3_d", "year", "region_o", 
          "region_d", "gdpcap_o", "gdpcap_d", "cw_o")
d <- bilateral_migration_data[, vars, with = FALSE]
d$log_dist_w <- log(d$distw)
d$log_pop_d <- log(d$pop_d)
d$log_urate_d <- log(d$urate_d)
d$log_gdpcap_o <- log(d$gdpcap_o)
d$log_gdpcap_d <- log(d$gdpcap_d)
d$log_gdpcap_o2 <- d$log_gdpcap_o^2
d$log_gdpcap_d2 <- d$log_gdpcap_d^2
d$year <- as.numeric(d$year)
d[, c("region_o", "region_d",
      "pop_d", "urate_d", "distw", "gdpcap_o", "gdpcap_d") := NULL]
# fit 5 imputed datasets 
imp_amelia <- amelia(x = d, m = 5, 
  idvars = c("iso3_o", "iso3_d"), ts = "year")

year <- list()
iso3o <- list()
iso3d <- list()
ys <- list()
xs <- list()
for(i in 1:5)
{
  # tmp1 <- eval(parse(text = paste0("imp_amelia$imputations$imp", i, "$year")))
  year[[i]] <- as.numeric(factor(eval(parse(text = 
    paste0("imp_amelia$imputations$imp", i, "$year")))))
  iso3d[[i]] <- as.numeric(factor(eval(parse(text = 
    paste0("imp_amelia$imputations$imp", i, "$iso3_d")))))
  iso3o[[i]] <- as.numeric(factor(eval(parse(text = 
    paste0("imp_amelia$imputations$imp", i, "$iso3_o")))))
  ys[[i]] <- eval(parse(text = paste0("imp_amelia$imputations$imp", 
    i, "$log_flow")))
  X <- as.matrix(eval(parse(text = paste0("imp_amelia$imputations$imp", i))))
  X <- X[, -c(1, 11:13)]
  for(k in 1:ncol(X)){
    X[, k] <- arm::rescale(X[, k], binary.inputs = "0/1") 
  }
  X <- cbind(1, X)
  xs[[i]] <- apply(X, 2, as.numeric)
}

dl1 <- list(
  K = ncol(xs[[1]]),
  N = nrow(xs[[1]]),
  y_obs = ys[[1]],
  X = xs[[1]],
  N_d = max(iso3d[[1]]),
  N_o = max(iso3o[[1]]),
  i_d = iso3d[[1]],
  i_o = iso3o[[1]],
  N_year = max(year[[1]]),
  i_year = year[[1]]
)

dl2 <- list(
  K = ncol(xs[[2]]),
  N = nrow(xs[[2]]),
  y_obs = ys[[2]],
  X = xs[[2]],
  N_d = max(iso3d[[2]]),
  N_o = max(iso3o[[2]]),
  i_d = iso3d[[2]],
  i_o = iso3o[[2]],
  N_year = max(year[[2]]),
  i_year = year[[2]]
)
dl3 <- list(
  K = ncol(xs[[3]]),
  N = nrow(xs[[3]]),
  y_obs = ys[[3]],
  X = xs[[3]],
  N_d = max(iso3d[[3]]),
  N_o = max(iso3o[[3]]),
  i_d = iso3d[[3]],
  i_o = iso3o[[3]],
  N_year = max(year[[3]]),
  i_year = year[[3]]
)
dl4 <- list(
  K = ncol(xs[[4]]),
  N = nrow(xs[[4]]),
  y_obs = ys[[4]],
  X = xs[[4]],
  N_d = max(iso3d[[4]]),
  N_o = max(iso3o[[4]]),
  i_d = iso3d[[4]],
  i_o = iso3o[[4]],
  N_year = max(year[[4]]),
  i_year = year[[4]]
)
dl5 <- list(
  K = ncol(xs[[5]]),
  N = nrow(xs[[5]]),
  y_obs = ys[[5]],
  X = xs[[5]],
  N_d = max(iso3d[[5]]),
  N_o = max(iso3o[[5]]),
  i_d = iso3d[[5]],
  i_o = iso3o[[5]],
  N_year = max(year[[5]]),
  i_year = year[[5]]
)

sm <- stan_model("~/Dropbox/Publications/Measuring Racial Bias/ISQ_Replication/Replication Files/Replication/weibull_hurdle_6.stan")

sf1 <- vb(sm, data = dl1, output_samples = 1000)
sf2 <- vb(sm, data = dl2, output_samples = 1000)
sf3 <- vb(sm, data = dl3, output_samples = 1000)
sf4 <- vb(sm, data = dl4, output_samples = 1000)
sf5 <- vb(sm, data = dl5, output_samples = 1000)
ex1 <- rstan::extract(sf1, pars = c("beta", "resids"))
ex2 <- rstan::extract(sf2, pars = c("beta", "resids"))
ex3 <- rstan::extract(sf3, pars = c("beta", "resids"))
ex4 <- rstan::extract(sf4, pars = c("beta", "resids"))
ex5 <- rstan::extract(sf5, pars = c("beta", "resids"))


betas <- rbind(ex1$beta, ex2$beta, ex3$beta, ex4$beta, ex5$beta)
pars <- t(apply(betas, 2, quantile, probs = c(0.5, 0.10, 0.9)))
pars <- data.table(pars)
colnames(X)[1] <- "(Intercept)"
pars[, param := c(colnames(X))]
setnames(pars, c("est", "lower", "upper", "pars"))


s <- summary(sf)
fit_table <- s$summary[1:18,]
rownames(fit_table) <- colnames(X)
rownames(fit_table)[1] <- "(Intercept)"


ind_vars <- paste(colnames(X[,-1]), collapse = "+")
form <- as.formula(paste("y", "~", ind_vars))
m1 <- lm(form, data = data.frame(X))
stargazer(m1, style = "apsr",
  covariate.labels = c("Log Migrant Stock", "Shared Border",
    "Colonial Relationship", "Common Language", "Log GDP Difference",
    "Common Currency", "Lib. Dem. Origin", "Lib. Dem. Dest.", "Origin Ed.",
    "Civil War Origin", "Log Distance", "Log Destination Population",
    "Log Destination Unemployment", "Log GDP per Capita Origin",
    "Log GDP per Capita Destination",
    "Log GDP per Capita Origin$^2$",
    "Log GDP per Capita Destination$^2$", "Intercept"),
  align = TRUE,
  column.sep.width = "2pt",
  dep.var.caption = "",
  omit.table.layout = "n",
  dep.var.labels = "Bilateral Migration",
  add.lines = list(
  c("Period RE", "\\checkmark"),
  c("Origin RE", "\\checkmark"),
  c("Destination RE", "\\checkmark"),
  c("Origin-Period RE", "\\checkmark"),
  c("Destination-Periodß RE", "\\checkmark"),
  c("N", "152,880"),
  c("MSE", "7.2")),
  ci.custom = list(cbind(fit_table[, 3], fit_table[, 7])),
  coef = list(fit_table[, 1]),
  keep.stat = "n",
  star.cutoffs = NA)
